Antonio Coín Castro
# -- Libraries
import os
import pickle
import arviz as az
import numpy as np
import pandas as pd
import skfda
import utils
import plot_utils
import preprocessing
import simulation
import bayesian_model
import mle
from IPython.display import display
from matplotlib import pyplot as plt
from matplotlib.lines import Line2D
from skfda.preprocessing.smoothing import BasisSmoother
from skfda.preprocessing.smoothing.kernel_smoothers import \
NadarayaWatsonSmoother as NW
from skfda.representation.basis import BSpline, Fourier
from skfda.representation.grid import FDataGrid
from sklearn.model_selection import KFold, train_test_split
from _fpls import FPLSBasis
# -- Configuration
# Extensions
%load_ext autoreload
%autoreload 2
# Plotting configuration
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
plt.rcParams['figure.figsize'] = [6, 4]
plt.style.use('arviz-darkgrid')
NCOLS = 3
def NROWS(x, ncols=NCOLS):
return np.ceil(x/ncols).astype('int')
# Randomness and reproducibility
SEED = 42
np.random.seed(SEED)
rng = np.random.default_rng(SEED)
# Floating point precision for display
np.set_printoptions(precision=3, suppress=True)
pd.set_option("display.precision", 3)
# Multiprocessing
N_CORES = 4
# Ignore warnings
np.seterr(over='ignore', divide='ignore')
os.environ["PYTHONWARNINGS"] = 'ignore::UserWarning'
We consider the model
$$ Y = \alpha_0 + \Psi^{-1}_{X}(\alpha) + \varepsilon, $$i.e.,
$$ Y_i\mid X_i=x_i \sim \mathcal N\left(\alpha_0 + \sum_{j=1}^p \beta_jx_i(\tau_j), \ \sigma^2\right). $$The prior distributions we choose are:
\begin{align*} \pi(\alpha_0, \sigma^2) & \propto 1/\sigma^2, \\ \tau & \sim \mathscr U([0, 1]^p), \\ \beta\mid \tau, \sigma^2 & \sim \mathcal N\left(b_0, g\sigma^2\left[\mathcal X_\tau' \mathcal X_\tau + \eta \lambda_{\text{max}}(\mathcal X_\tau' \mathcal X_\tau)I\right]^{-1}\right), \end{align*}Note that for computational reasons we will work with $\log \sigma$ instead of $\sigma^2$, and hence the associated prior distribution is
$$ \pi(\alpha_0, \log\sigma) \propto 1. $$Writing the parameter vector as $\theta = (\beta, \tau, \alpha_0, \log\sigma)$, the joint posterior probability is:
$$ \pi(\beta, \tau, \alpha_0, \log\sigma\mid Y) \propto \frac{|G_\tau|^{1/2}}{\sigma^{p+n}} \exp\left\{ -\frac{1}{2\sigma^2} \left(\|Y- \alpha_0\boldsymbol{1} - \mathcal X_\tau\beta\|^2 + \frac{1}{g}(\beta - b_0)'G_\tau(\beta - b_0) \right) \right\}. $$Hence, the log-posterior probability is:
$$ \log \pi(\beta, \tau, \alpha_0, \log\sigma\mid Y) \propto \frac{1}{2}\log |G_\tau| - (p+n)\log \sigma -\frac{1}{2\sigma^2} \left(\|Y-\alpha_0\boldsymbol{1} - \mathcal X_\tau\beta\|^2 + \frac{1}{g}(\beta - b_0)'G_\tau(\beta - b_0) \right). $$The metrics considered for model evaluation will be:
We generate a toy dataset with $n=100$ functional regressors $X_i(t) \sim GP(0, K(s, t))$, a response variable given by either a $L^2$ model or a "simple" RKHS function, a value of $\alpha_0=5$ and a variance of $\sigma^2=0.5$. More precisely, we choose one of
$$ Y_i \sim \mathcal N\big(5 -5X_i(0.1) + 10X_i(0.8), \ 0.5\big) $$or
$$ Y_i \sim \mathcal N\left(5 + \int_0^1 \beta(t)X_i(t)\, dt, \ 0.5\right), $$where $\beta \in L^2[0, 1]$.
We consider a regular grid of $N=100$ points on $[0, 1]$. In addition, we center the $X_i$ so that they have zero mean when fed to the sampling algorithms.
We also generate a test dataset with $n_{\text{test}}=50$ regressors for model evaluation.
# -- Data generation parameters
SYNTHETIC_DATA = True
MODEL_GEN = "RKHS" # 'L2' or 'RKHS'
REAL_DATA = "Aemet"
INITIAL_SMOOTHING = 'NW' # 'NW' # None, 'NW' or 'Basis'
N_BASIS = 16
SCALE_PREDICTORS = False
STANDARDIZE_RESPONSE = False
kernel_fn = simulation.fractional_brownian_kernel
beta_coef = simulation.cholaquidis_scenario3
basis = BSpline(n_basis=N_BASIS)
smoothing_params = np.logspace(-4, 4, 50)
n_train, n_test = 100, 50
N = 100
tau_range = (0, 1)
# -- Dataset generation
if SYNTHETIC_DATA:
grid = np.linspace(tau_range[0] + 1./N, tau_range[1], N)
beta_true = [-5., 10.]
tau_true = [0.1, 0.8]
alpha0_true = 5.
sigma2_true = 0.5
if MODEL_GEN == "L2":
x, y = simulation.generate_gp_l2_dataset(
grid,
kernel_fn,
n_train + n_test,
beta_coef,
alpha0_true,
sigma2_true,
rng=rng
)
elif MODEL_GEN == "RKHS":
x, y = simulation.generate_gp_rkhs_dataset(
grid,
kernel_fn,
n_train + n_test,
beta_true,
tau_true,
alpha0_true,
sigma2_true,
rng=rng
)
else:
raise ValueError("Invalid model generation strategy.")
# Train/test split
X, X_test, Y, Y_test = train_test_split(
x, y, train_size=n_train, random_state=SEED)
# Create FData object
X_fd = skfda.FDataGrid(X, grid)
X_test_fd = skfda.FDataGrid(X_test, grid)
else: # Real data
if REAL_DATA == "Tecator":
x, y = skfda.datasets.fetch_tecator(return_X_y=True)
y = np.sqrt(y[:, 1]) # Sqrt-Fat
elif REAL_DATA == "Aemet":
data = skfda.datasets.fetch_aemet()['data']
data_matrix = data.data_matrix
temperature = data_matrix[:, :, 0]
x = FDataGrid(temperature, data.grid_points)
# Log-Sum of log-precipitation for each station
y = np.log(np.exp(data_matrix[:, :, 1]).sum(axis=1))
else:
raise ValueError("REAL_DATA must be 'Tecator' or 'Aemet'.")
X_fd, X_test_fd, Y, Y_test = train_test_split(
x, y, train_size=0.8, random_state=SEED)
N = len(X_fd.grid_points[0])
grid = utils.normalize_grid(x.grid_points[0], tau_range[0], tau_range[1])
n_train, n_test = len(X_fd.data_matrix), len(X_test_fd.data_matrix)
# Smooth data
if INITIAL_SMOOTHING is not None:
if INITIAL_SMOOTHING == "NW":
smoother = NW()
elif INITIAL_SMOOTHING == "Basis":
smoother = BasisSmoother(basis)
else:
raise ValueError(
f"Expected 'NW' or 'Basis' but got {INITIAL_SMOOTHING}.")
X_fd, best_smoother, X_test_fd = preprocessing.smooth_data(
X_fd,
smoother,
smoothing_params,
X_test_fd
)
print(
"Smoother: {}".format(
best_smoother.best_estimator_.__class__.__name__))
print(
"Smoothing parameter: {:.3f}".format(
best_smoother.best_params_['smoothing_parameter']))
# Standardize data
X_fd, X_test_fd = preprocessing.standardize_predictors(
X_fd, X_test_fd, SCALE_PREDICTORS)
if STANDARDIZE_RESPONSE:
Y, Y_test = preprocessing.standardize_response(Y, Y_test)
# Get data matrices
X = X_fd.data_matrix.reshape(-1, N)
X_test = X_test_fd.data_matrix.reshape(-1, N)
# -- Dataset visualization
plot_utils.plot_dataset_regression(
X,
Y,
n_samples=n_train if not SYNTHETIC_DATA else n_train//2
)
In our algorithms, we consider an unconstrained tranformed parameter space $\tilde \Theta=\mathbb{R}^{2\hat p+2}$ via the bijections
# -- Model hyperparameters
p_max = 3
g = 5
eta = 0.1
mle_method = 'L-BFGS-B' # 'Nelder-Mead', 'Powell' or 'L-BFGS-B'
mle_strategy = 'global'
prior_p = {
1: 0.10,
2: 0.60,
3: 0.30,
}
beta_range = (-500, 500)
INCLUDE_P = False
TRANSFORM_TAU = False
TRANSFORM_SIGMA = True
FIT_SK = True
COMPUTE_MLE = True
# -- Names and labels
# Names of parameters
theta_names = ["β", "τ", "α0", "σ2"]
if INCLUDE_P:
theta_names = ["p"] + theta_names
# Grouped labels
theta_labels_grouped = [r"$\beta$", r"$\tau$", r"$\alpha_0$", r"$\sigma^2$"]
if INCLUDE_P:
theta_labels_grouped = [r"$p$"] + theta_labels_grouped
# Individual labels
theta_labels = [] if not INCLUDE_P else [theta_labels_grouped[0]]
for i in range(p_max):
theta_labels.append(fr"$\beta_{i + 1}$")
for i in range(p_max):
theta_labels.append(fr"$\tau_{i + 1}$")
theta_labels.append(theta_labels_grouped[-2])
theta_labels.append(theta_labels_grouped[-1])
# Labels for Arviz
theta_labeller = az.labels.MapLabeller(
var_name_map=dict(zip(theta_names[-2:], theta_labels_grouped[-2:])),
coord_map={"vector": dict(
zip(np.arange(p_max), np.arange(1, p_max + 1)))}
)
# -- Parameter space and miscellaneous
if TRANSFORM_TAU:
tau_ttr = bayesian_model.Logit()
else:
tau_ttr = bayesian_model.Identity()
if TRANSFORM_SIGMA:
sigma2_ttr = bayesian_model.LogSq()
else:
sigma2_ttr = bayesian_model.Identity()
# Parameter space
theta_space = bayesian_model.ThetaSpace(
p_max,
grid,
include_p=INCLUDE_P,
names=theta_names,
labels=theta_labels,
labeller=theta_labeller,
tau_range=tau_range,
beta_range=beta_range,
tau_ttr=tau_ttr,
sigma2_ttr=sigma2_ttr
)
# Statistics for posterior predictive checks
point_estimators_bpv = [
("min", np.min),
("max", np.max),
("median", np.median),
("mean", np.mean),
("std", np.std)
]
# Folds for CV
folds = KFold(shuffle=True, random_state=SEED)
# -- Select family of regressors
alphas = np.logspace(-4, 4, 20)
n_selected = [5, 10, 15, 20, 25, X.shape[1]]
n_components = [2, 3, 4, 5, 10]
n_basis_bsplines = [8, 10, 12, 14, 16]
n_basis_fourier = [3, 5, 7, 9, 11]
n_neighbors = [3, 5, 7]
basis_bspline = [BSpline(n_basis=p) for p in n_basis_bsplines]
basis_fourier = [Fourier(n_basis=p) for p in n_basis_fourier]
basis_fpls = []
for p in n_components:
try:
basis_fpls.append(FPLSBasis(X_fd, Y, n_basis=p))
except ValueError:
print(f"Can't create FPLSBasis with n_basis={p}")
continue
params_regularizer = {"reg__alpha": alphas}
params_svm = {"reg__C": alphas,
"reg__gamma": ['auto', 'scale']}
params_select = {"selector__p": n_selected}
params_pls = {"reg__n_components": n_components}
params_dim_red = {"dim_red__n_components": n_components}
params_basis = {"basis__basis": basis_bspline + basis_fourier}
params_basis_fpca = {"basis__n_basis": n_components}
params_basis_fpls = {"basis__basis": basis_fpls}
params_knn = {"reg__n_neighbors": n_neighbors,
"reg__weights": ['uniform', 'distance']}
params_mrmr = {"var_sel__method": ["MID", "MIQ"],
"var_sel__n_features_to_select": n_components}
regressors = utils.linear_regression_comparison_suite(
params_regularizer,
params_select,
params_dim_red,
params_svm,
params_basis,
params_pls,
params_knn,
random_state=SEED
)
# -- Fit models and show metrics
if FIT_SK:
df_metrics_sk, reg_cv = utils.cv_sk(
regressors,
X_fd,
Y,
X_test_fd,
Y_test,
folds,
n_jobs=N_CORES,
verbose=True
)
display(df_metrics_sk.style.hide_index())
# -- MLE computation
if COMPUTE_MLE:
print(f"-- Computing MLE with {N_CORES} independent runs --")
theta_space_fixed = theta_space.copy_p_fixed()
mle_theta, bic = mle.compute_mle(
X,
Y,
theta_space_fixed,
kind='linear',
method=mle_method,
strategy=mle_strategy,
n_jobs=N_CORES,
rng=rng
)
Y_pred_mle = bayesian_model.generate_response_linear(
X_test, mle_theta, theta_space_fixed, noise=False
)
df_metrics_mle = utils.linear_regression_metrics(
Y_test,
Y_pred_mle,
theta_space.p_max,
"mle"
)
print(f"\nBIC [p={p_max}]: {bic:.3f}")
display(pd.DataFrame(zip(theta_space_fixed.labels, mle_theta),
columns=["", "MLE"]).style.hide_index())
print("Regression metrics:")
display(df_metrics_mle.style.hide_index())
from mcmc_sampler import BayesianLinearRegressionEmcee
import emcee
We set up the initial points of the chains to be in a random neighbourhood around the MLE to increase the speed of convergence.
# -- Sampler parameters
n_walkers = 64
n_iter_warmup = 100
n_iter = 1000
compute_pp = True
compute_ll = False
frac_random = 0.3
moves = [
(emcee.moves.StretchMove(), 0.7),
(emcee.moves.WalkMove(), 0.3)
]
thin = 1
thin_pp = 5
FAST_RUN = True
COMPUTE_METRICS_FAST_RUN = True
# -- Run sampler
print("-- Running affine-invariant ensemble sampler "
f"with {N_CORES} cores --")
reg_emcee = BayesianLinearRegressionEmcee(
theta_space,
n_walkers,
n_iter,
b0='mle',
g=g,
eta=eta,
prior_p=prior_p,
n_iter_warmup=n_iter_warmup,
initial_state='mle',
frac_random=frac_random,
moves=moves,
compute_pp=compute_pp,
compute_ll=compute_ll,
thin=thin,
thin_pp=thin_pp,
mle_method=mle_method,
mle_strategy=mle_strategy,
n_jobs=N_CORES,
verbose=2,
progress_notebook=True,
random_state=SEED, # change to 'rng' for different outputs each time
)
if FAST_RUN:
df_metrics_emcee = utils.run_bayesian_model(
reg_emcee,
X,
Y,
X_test,
Y_test,
folds,
n_jobs=N_CORES,
prefix="emcee",
compute_metrics=COMPUTE_METRICS_FAST_RUN,
verbose=True,
notebook=True,
random_state=SEED,
)
if COMPUTE_MLE:
df_metrics_emcee = df_metrics_emcee.append(df_metrics_mle)
if FIT_SK:
df_metrics_emcee = df_metrics_emcee.append(df_metrics_sk)
df_metrics_emcee.sort_values(
df_metrics_emcee.columns[-2],
inplace=True
)
display(df_metrics_emcee.style.hide_index())
else:
reg_emcee.fit(X, Y)
print(f"Mean acceptance: {100*reg_emcee.mean_acceptance():.3f}%")
idata_emcee = reg_emcee.get_idata()
We analyze the samples of all chains, discarding a few times the integrated autocorrelation times worth of samples. We could also perform thinning and take only every $k$-th value. The effective sample size is computed as suggested here (p.66). Missing values are replaced with a value of $0$.
# -- Sampler statistics and trace (with burn-in and thinning)
# Get number of samples
n_samples_emcee = reg_emcee.total_samples()
# Get autocorrelation times
autocorr = reg_emcee.autocorrelation_times()
pd.DataFrame(
zip(theta_labels, autocorr, n_samples_emcee/autocorr),
columns=["", "Autocorrelation times", "Effective i.i.d samples"]
).style.hide_index()
# -- Trace summary
reg_emcee.summary()
# -- Trace plot
az.plot_trace(
idata_emcee,
labeller=theta_labeller,
combined=True,
var_names=theta_names
)
print("Combined density and trace plot:")
# -- Marginal posterior plot
plot_utils.plot_posterior(
idata_emcee,
theta_space,
gridsize=(NROWS(theta_space.n_dim), NCOLS),
textsize=20,
)
print("Marginal posterior distributions:")
We can perform a couple of visual posterior predictive checks. In particular:
We also show the Bayesian p-value for several statistics, which is defined as $P(T(y^*)\leq T(y)\mid y)$, and is computed by simply measuring the proportion of generated samples $\{T(Y^*_m)\}_m$ that fall below the real value of the statistic. It is expected to be around $0.5$ when the model accurately represents the data.
# -- Generate and plot posterior predictive checks from X
if "posterior_predictive" not in idata_emcee:
pp = bayesian_model.generate_pp(
idata_emcee, X, theta_space, rng=rng, verbose=True)
utils.pp_to_idata([pp], idata_emcee, ['y_star'], merge=True)
else:
pp = idata_emcee.posterior_predictive['y_star'].to_numpy()
# Posterior predictive checks
fig, axs = plt.subplots(1, 2, figsize=(10, 4))
plt.suptitle(r"Posterior predictive checks on $X$")
plot_utils.plot_ppc(
idata_emcee,
n_samples=500,
ax=axs[0],
data_pairs={'y_obs': 'y_star'}
)
az.plot_bpv(idata_emcee,
kind='t_stat',
t_stat='mean',
ax=axs[1],
plot_mean=False,
data_pairs={'y_obs': 'y_star'},
bpv=False
)
axs[1].axvline(Y.mean(),
ls="--",
color="r",
lw=2,
label=r"$\bar Y$"
)
handles, labels = axs[1].get_legend_handles_labels()
handles.extend([Line2D([0], [0], label=r"Distribution of $\bar Y^*$")])
axs[1].legend(handles=handles)
# Show Bayesian p-values
for name, stat in point_estimators_bpv:
bpv = bayesian_model.bpv(pp, Y, stat)
print(f"bpv [T={name}]: {bpv:.3f}")
# -- Autocorrelation
plot_utils.plot_autocorr(
idata_emcee,
theta_space,
gridsize=(NROWS(theta_space.n_dim), NCOLS)
)
print("Combined autocorrelation times:")
# -- Generate and plot posterior predictive checks from X_test
# Posterior predictive checks
pp_test = bayesian_model.generate_pp(
idata_emcee,
X_test,
theta_space,
rng=rng,
verbose=True,
)
idata_pp_test = utils.pp_to_idata(
[pp_test], idata_emcee, ['y_star'], y_obs=Y_test)
fig, axs = plt.subplots(1, 2, figsize=(10, 4))
plt.suptitle(r"Posterior predictive checks on $X_{test}$")
plot_utils.plot_ppc(
idata_pp_test,
n_samples=500,
data_pairs={'y_obs': 'y_star'},
ax=axs[0]
)
az.plot_bpv(
idata_pp_test,
kind='t_stat',
t_stat='mean',
data_pairs={'y_obs': 'y_star'},
plot_mean=False,
ax=axs[1],
bpv=False
)
axs[1].axvline(
Y_test.mean(),
ls="--",
color="r",
lw=2,
label=r"$\bar Y_{test}$"
)
handles, labels = axs[1].get_legend_handles_labels()
handles.extend([Line2D([0], [0], label=r"Distribution of $\bar Y^*_{test}$")])
_ = axs[1].legend(handles=handles)
# Show Bayesian p-values
for name, stat in point_estimators_bpv:
bpv = bayesian_model.bpv(pp_test, Y_test, stat)
print(f"bpv [T={name}]: {bpv:.3f}")
# -- Compute metrics using several point estimates
# Posterior mean estimate
Y_pred_pp = pp_test[:, ::thin_pp, :].mean(axis=(0, 1))
df_metrics_emcee = utils.linear_regression_metrics(
Y_test,
Y_pred_pp,
reg_emcee.n_components("posterior_mean"),
"emcee_posterior_mean"
)
# Point estimates
for pe in reg_emcee.default_point_estimates:
Y_pred_pe = reg_emcee.predict(X_test, strategy=pe)
df_metrics_emcee = utils.linear_regression_metrics(
Y_test,
Y_pred_pe,
reg_emcee.n_components(pe),
"emcee_" + pe,
df=df_metrics_emcee,
)
df_metrics_emcee.style.hide_index()
# -- Test variable selection procedure
df_metrics_emcee_var_sel = None
for pe in reg_emcee.default_point_estimates:
X_red = reg_emcee.transform(X, pe=pe)
X_test_red = reg_emcee.transform(X_test, pe=pe)
df_metrics_emcee_var_sel = utils.multiple_linear_regression_cv(
X_red,
Y,
X_test_red,
Y_test,
folds,
n_jobs=N_CORES,
prefix="emcee",
pe=pe,
df=df_metrics_emcee_var_sel,
)
df_metrics_emcee_var_sel.style.hide_index()
This is only for testing purposes; in a production environment one should use the Backends feature of emcee.
# -- Save
with open("emcee-p-fixed.idata", 'wb') as file:
pickle.dump(idata_emcee, file)
# -- Load
with open("emcee-p-fixed.idata", 'rb') as file:
idata_emcee = pickle.load(file)
trace = idata_emcee.posterior.to_array().to_numpy().T
trace_flat = trace.reshape(-1, trace.shape[-1]) # All chains combined
TODO (v4)
from mcmc_sampler import BayesianLinearRegressionPymc
import pymc3 as pm
# -- Sampler parameters
n_chains = N_CORES
USE_NUTS = False
if USE_NUTS:
n_samples = 500
n_tune = 500
step_fn = pm.NUTS
step_kwargs = {"target_accept": 0.8}
else:
n_samples = 5000
n_tune = 1000
step_fn = pm.Metropolis
step_kwargs = {}
burn = 0
thin = 1
thin_pp = 5
FAST_RUN = True
COMPUTE_METRICS_FAST_RUN = True
# -- Run sampler
reg_pymc = BayesianLinearRegressionPymc(
theta_space,
n_chains,
n_samples,
b0='mle',
g=g,
eta=eta,
prior_p=prior_p,
step_fn=step_fn,
step_kwargs=step_kwargs,
n_iter_warmup=n_tune,
thin=thin,
thin_pp=thin_pp,
burn=burn,
mle_method=mle_method,
mle_strategy=mle_strategy,
n_jobs=N_CORES,
verbose=2,
random_state=SEED
)
print(f"-- Running pymc sampler with {N_CORES} cores --")
if FAST_RUN:
df_metrics_pymc = utils.run_bayesian_model(
reg_pymc,
X,
Y,
X_test,
Y_test,
folds,
n_jobs=N_CORES,
prefix="pymc",
compute_metrics=COMPUTE_METRICS_FAST_RUN,
verbose=True,
notebook=True,
random_state=SEED,
)
if COMPUTE_MLE:
df_metrics_pymc = df_metrics_pymc.append(df_metrics_mle)
if FIT_SK:
df_metrics_pymc = df_metrics_pymc.append(df_metrics_sk)
df_metrics_pymc.sort_values(
df_metrics_pymc.columns[-2],
inplace=True
)
display(df_metrics_pymc.style.hide_index())
else:
reg_pymc.fit(X, Y)
print(f"Mean acceptance: {100*reg_pymc.mean_acceptance():.3f}%")
idata_pymc = reg_pymc.get_idata()
Since the tuning iterations already serve as burn-in, we keep the whole trace. In addition, we could consider thinning the samples.
# -- Trace summary
reg_pymc.summary(stats='all')
# -- Trace plot
az.plot_trace(
idata_pymc,
labeller=theta_labeller,
combined=True,
var_names=theta_names
)
print("Combined density and trace plot:")
# -- Marginal posterior plot
plot_utils.plot_posterior(
idata_pymc,
theta_space,
gridsize=(NROWS(theta_space.n_dim), NCOLS),
textsize=20,
)
print("Marginal posterior distributions:")
# -- Generate and plot posterior predictive checks from X
pp = bayesian_model.generate_pp(
idata_pymc, X, theta_space, rng=rng, verbose=True)
utils.pp_to_idata([pp], idata_pymc, ['y_star'], merge=True)
# Posterior predictive checks
fig, axs = plt.subplots(1, 2, figsize=(10, 4))
plt.suptitle(r"Posterior predictive checks on $X$")
plot_utils.plot_ppc(
idata_pymc,
n_samples=500,
ax=axs[0],
data_pairs={"y_obs": "y_star"}
)
az.plot_bpv(
idata_pymc,
kind='t_stat',
t_stat='mean',
plot_mean=False,
ax=axs[1],
bpv=False,
data_pairs={"y_obs": "y_star"}
)
axs[1].axvline(
Y.mean(),
ls="--",
color="r",
lw=2,
label=r"$\bar Y$"
)
handles, labels = axs[1].get_legend_handles_labels()
handles.extend([Line2D([0], [0], label=r"Distribution of $\bar Y^*$")])
_ = axs[1].legend(handles=handles)
# Show Bayesian p-values
for name, stat in point_estimators_bpv:
bpv = bayesian_model.bpv(pp, Y, stat)
print(f"bpv [T={name}]: {bpv:.3f}")
# -- Autocorrelation
plot_utils.plot_autocorr(
idata_pymc,
theta_space,
gridsize=(NROWS(theta_space.n_dim), NCOLS)
)
print("Combined autocorrelation times:")
# -- Graphical model
print("Graphical model:")
reg_pymc.to_graphviz()
First we take a look at the distribution of predictions on a previously unseen dataset.
# -- Generate and plot posterior predictive checks from X_test
pp_test = bayesian_model.generate_pp(
idata_pymc, X_test, theta_space, rng=rng, verbose=True)
idata_pp_test = utils.pp_to_idata(
[pp_test], idata_pymc, ['y_star'], y_obs=Y_test)
# Posterior predictive checks
fig, axs = plt.subplots(1, 2, figsize=(10, 4))
plt.suptitle(r"Posterior predictive checks on $X_{test}$")
plot_utils.plot_ppc(
idata_pp_test,
n_samples=500,
ax=axs[0],
data_pairs={"y_obs": "y_star"}
)
az.plot_bpv(
idata_pp_test,
kind='t_stat',
t_stat='mean',
plot_mean=False,
ax=axs[1],
bpv=False,
data_pairs={"y_obs": "y_star"}
)
axs[1].axvline(
Y_test.mean(),
ls="--",
color="r",
lw=2,
label=r"$\bar Y_{test}$"
)
handles, labels = axs[1].get_legend_handles_labels()
handles.extend([Line2D([0], [0], label=r"Distribution of $\bar Y_{test}^*$")])
_ = axs[1].legend(handles=handles)
# Show Bayesian p-values
for name, stat in point_estimators_bpv:
bpv = bayesian_model.bpv(pp_test, Y_test, stat)
print(f"bpv [T={name}]: {bpv:.3f}")
Next we look at the MSE when using several point-estimates for the parameters, as well as the mean of the posterior samples.
# -- Compute metrics using several point estimates
# Posterior mean estimate
Y_pred_pp = pp_test[:, ::thin_pp, :].mean(axis=(0, 1))
df_metrics_pymc = utils.linear_regression_metrics(
Y_test,
Y_pred_pp,
reg_pymc.n_components("posterior_mean"),
"pymc_posterior_mean"
)
# Point estimates
for pe in reg_pymc.default_point_estimates:
Y_pred_pe = reg_pymc.predict(X_test, strategy=pe)
df_metrics_pymc = utils.linear_regression_metrics(
Y_test,
Y_pred_pe,
reg_pymc.n_components(pe),
"pymc_" + pe,
df=df_metrics_pymc,
)
df_metrics_pymc.style.hide_index()
# -- Test variable selection procedure
df_metrics_pymc_var_sel = None
for pe in reg_pymc.default_point_estimates:
X_red = reg_pymc.transform(X, pe=pe)
X_test_red = reg_pymc.transform(X_test, pe=pe)
df_metrics_pymc_var_sel = utils.multiple_linear_regression_cv(
X_red,
Y,
X_test_red,
Y_test,
folds,
n_jobs=N_CORES,
prefix="pymc",
pe=pe,
df=df_metrics_pymc_var_sel,
)
df_metrics_pymc_var_sel.style.hide_index()
# -- Save
_ = idata_pymc.to_netcdf("pymc-p-fixed.nc")
# -- Load
idata_pymc = az.from_netcdf("pymc-p-fixed.nc")
%load_ext watermark
%watermark -n -u -v -iv -w